## Likelihood, dynamic --------------

# Define the likelihood function that will be used for estimation.
fedoc_ll <- function(theta,k,y0,y1,y2,y3,X1,X2,X3,
                     h = 1 #bandwidth
                     ){

  # k is the cutoff for the lagged dependent variable

  # Deconstruct the parameter
  K <- ncol(X1)
  b <- theta[1:K]
  rho <- theta[K+1]

  if(length(theta) == (K+1)){
    binary_choice <- TRUE
  } else{
    binary_choice <- FALSE
    h_inv <- theta[-(1:(K+1))]
  }

  # What's J?
  J <- length(unique(c(y0,y1,y2,y3)))
  ifelse(min(unique(c(y0,y1,y2,y3)))==1,
         "OK. Min is 1.", 
         stop("Low value not 1."))
  ifelse(max(unique(c(y0,y1,y2,y3)))==J,
         "OK. Max is J.", 
         stop("High value not J."))
  ifelse(length(theta)==(K+1+J-2),
         "OK. Theta length.", 
         stop("Incorrect theta length."))

  d_X_discrete <- matrix(apply((X2[,-1]-X3[,-1])^2,MARGIN = 1,sum),ncol=1) 
  discrete_fixed <- ifelse(d_X_discrete == 0,1,0)
  kernel_input <- (X2[,1]-X3[,1])/h
  # Gaussian kernel
  X_fixed <- dnorm(kernel_input)*discrete_fixed

  for(j in 2:k){
    for(l in k:J){

      d0 <- y0>=k
      d1 <- y1>=k
      d2l <- y2>=l
      d2j <- y2>=j
      d3j <- y3>=j
      d3l <- y3>=l
      d3jl <- (1-d1)*d3j + d1*d3l

      y_switch <- (1-d1)*d2l + d1*(1-d2j)
      cjl <- y_switch*X_fixed

      # The matrix Z consists of time dummies that
      #  have coefficient equal to the thresholds.
      if(binary_choice){
        # If its binary choice, then there are no (non-normalized) thresholds.
        dX <- cbind(X1-X2,d0-d3jl)
      } else{

        # Construct a matrix of indicators, whose regression coefficients
        #   are the thresholds to be determined.
        Z <- matrix(0,nrow=nrow(X2),ncol=J-2)
        if(j<k){
          Z[,j-1] <- d3jl
        }
        if(l>k){
          Z[,l-2] <- Z[,l-2] + (1-d3jl)
        }
        dX <- cbind(X1-X2,d0-d3jl,Z)  
      }

      # Construct the index.
      Xb <- dX %*% matrix(theta,ncol=1)
      logitXb <- plogis(Xb)

      # Function value
      res_jl <- -mean(cjl*(d1*log(logitXb)+(1-d1)*log(1-logitXb)))

      # Gradient
      s_noX <- cjl*(d1-logitXb)
      s <- matrix(rep(s_noX,K+1+J-2),ncol=K+1+J-2) * dX
      attr(res_jl,"gradient") <- -colMeans(s)
      #
      # Hessian
      H_noX <- -cjl*logitXb*(1-logitXb)
      H <- t(matrix(rep(H_noX,K+1+J-2),ncol=K+1+J-2) * dX) %*% dX / length(y2)
      attr(res_jl,"hessian") <- -H

      if((j==2) & (l==k)){

        res <- res_jl

      } else {

        res <- res + res_jl
        attr(res,"gradient") <- attr(res,"gradient") +
          attr(res_jl,"gradient")
        attr(res,"hessian") <- attr(res,"hessian") +
          attr(res_jl,"hessian")


      }

    }
  }

  return(res)
}

## Likelihood, static --------------

# Define the binary choice one first.

fe_logit_ll_gH <- function(b,y1,y2,dX){
  # Common
  K <- length(b)
  Xb <- dX %*% matrix(b,ncol=1)
  logitXb <- plogis(Xb)
  switcher = ifelse(y1==y2,0,1)

  # Function value
  res <- -mean(switcher*(y2*log(logitXb)+(1-y2)*log(1-logitXb)))

  # Gradient
  s_noX <- switcher*(y2-logitXb)
  s <- matrix(rep(s_noX,K),ncol=K) * dX
  attr(res,"gradient") <- -colMeans(s)
  # 
  # Hessian
  H_noX <- -switcher*logitXb*(1-logitXb)
  H <- t(matrix(rep(H_noX,K),ncol=K) * dX) %*% dX / length(y2)
  attr(res,"hessian") <- -H
  
  return(res)
}

# The ordered one is built on top of that.
feoc_ll_gH <- function(theta,y1,y2,dX){
  # Common
  K <- ncol(dX)
  b <- theta[1:K]

  # What's J?
  J <- length(unique(c(y1,y2)))

  ifelse(min(unique(c(y1,y2)))==1,
         "OK. Min is 1.", 
         stop("Low value not 1."))
  ifelse(max(unique(c(y1,y2)))==J,
         "OK. Max is J.", 
         stop("High value not J."))
  ifelse(length(theta)==(K+J-2),
         "OK. Theta length J-2.", 
         stop("Incorrect theta length."))

  makeZ <- function(j1,j2){
    dummy_t <- matrix(0,nrow = nrow(dX),ncol = J-2)
    dummy_1 <- dummy_t
    if(j1>1){
      dummy_1[,j1-2] <- 1
    }
    dummy_2 <- dummy_t
    if(j2>1){
      dummy_2[,j2-2] <- 1
    }
    return(cbind(dX,dummy_1-dummy_2))
  }

  for(j1 in 2:J){
    for(j2 in 2:J){

      d1 <- y1>=j1
      d2 <- y2>=j2
      res_j1j2 <- fe_logit_ll_gH(theta,d1,d2,makeZ(j1,j2))

      if((j1==2) & (j2==2)){

        res <- res_j1j2

      } else {

        res <- res + res_j1j2
        attr(res,"gradient") <- attr(res,"gradient") +
          attr(res_j1j2,"gradient")
        attr(res,"hessian") <- attr(res,"hessian") +
          attr(res_j1j2,"hessian")

      }

    }
  }

  return(res)
}
